# Load necessary libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(bipartite)  # For network analysis
## Warning: package 'bipartite' was built under R version 4.3.3
## Loading required package: vegan
## Warning: package 'vegan' was built under R version 4.3.3
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-6.1
## Loading required package: sna
## Warning: package 'sna' was built under R version 4.3.3
## Loading required package: statnet.common
## 
## Attaching package: 'statnet.common'
## The following objects are masked from 'package:base':
## 
##     attr, order
## Loading required package: network
## 
## 'network' 1.18.2 (2023-12-04), part of the Statnet Project
## * 'news(package="network")' for changes since last version
## * 'citation("network")' for citation information
## * 'https://statnet.org' for help, support, and other information
## sna: Tools for Social Network Analysis
## Version 2.8 created on 2024-09-07.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##  For citation information, type citation("sna").
##  Type help(package="sna") to get started.
##  This is bipartite 2.20.
##  For latest changes see versionlog in ?"bipartite-package". For citation see: citation("bipartite").
##  Have a nice time plotting and analysing two-mode networks.
## 
## Attaching package: 'bipartite'
## The following object is masked from 'package:vegan':
## 
##     nullmodel
library(vegan)
library(tibble)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:lubridate':
## 
##     %--%, union
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## The following object is masked from 'package:bipartite':
## 
##     strength
## The following objects are masked from 'package:sna':
## 
##     betweenness, bonpow, closeness, components, degree, dyad.census,
##     evcent, hierarchy, is.connected, neighborhood, triad.census
## The following objects are masked from 'package:network':
## 
##     %c%, %s%, add.edges, add.vertices, delete.edges, delete.vertices,
##     get.edge.attribute, get.edges, get.vertex.attribute, is.bipartite,
##     is.directed, list.edge.attributes, list.vertex.attributes,
##     set.edge.attribute, set.vertex.attribute
## The following object is masked from 'package:vegan':
## 
##     diversity
## The following object is masked from 'package:permute':
## 
##     permute
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(parallel)
library(foreach)
## Warning: package 'foreach' was built under R version 4.3.3
library(doParallel)
## Warning: package 'doParallel' was built under R version 4.3.3
## Loading required package: iterators
## Warning: package 'iterators' was built under R version 4.3.3
library(sf)
## Warning: package 'sf' was built under R version 4.3.3
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(rnaturalearth)
library(rnaturalearthdata)
## Warning: package 'rnaturalearthdata' was built under R version 4.3.3
## 
## Attaching package: 'rnaturalearthdata'
## The following object is masked from 'package:rnaturalearth':
## 
##     countries110
library(geosphere)
## Warning: package 'geosphere' was built under R version 4.3.3
library(readr)

# Read the data
# Read CSV with network_id as character
interactions <- read_csv("Interaction_data.csv",
                         col_types = cols(
                           Network_id = col_character()
                         ))

# Summarize network-level information
network_summary <- interactions %>%
  group_by(Study_id,Network_id, Country, Bioregion, EuPPollNet_habitat) %>%
  summarise(
    n_plants = n_distinct(Plant_original_name),
    n_pollinators = n_distinct(Pollinator_original_name),
    .groups = "drop"
  )

# Inspect result
head(network_summary)
## # A tibble: 6 × 7
##   Study_id     Network_id Country Bioregion   EuPPollNet_habitat   n_plants
##   <chr>        <chr>      <chr>   <chr>       <chr>                   <int>
## 1 10_Vanbergen FR_V02     France  Continental Agricultural land           6
## 2 10_Vanbergen FR_V02     France  Continental Intensive grasslands        2
## 3 10_Vanbergen FR_V03     France  Continental Agricultural land           7
## 4 10_Vanbergen FR_V03     France  Continental Forest/woodland            16
## 5 10_Vanbergen FR_V03     France  Continental Intensive grasslands        1
## 6 10_Vanbergen FR_V03     France  Continental Riparian vegetation         3
## # ℹ 1 more variable: n_pollinators <int>
# Number of unique networks
n_networks <- network_summary %>%
  distinct(Network_id, Study_id) %>%
  count()

print(n_networks)
## # A tibble: 1 × 1
##       n
##   <int>
## 1  1630
# Years of data 
# Convert to proper Date type
interactions$Date <- as.Date(interactions$Date)

# Extract year and month
interactions <- interactions %>%
  mutate(Year = year(Date),
         Month = month(Date, label = TRUE, abbr = TRUE))   # e.g. "Jan", "Feb"
networks_per_year <- interactions %>%
  distinct(Network_id, Year) %>%
  count(Year)

ggplot(networks_per_year, aes(x = Year, y = n)) +
  geom_col(fill = "steelblue") +
  theme_minimal() +
  labs(title = "Temporal coverage of networks",
       x = "Year", y = "Number of networks")

# Count distinct studies in each country
studies_per_country <- network_summary %>%
  distinct(Study_id, Country) %>%
  count(Country, name = "Num_studies")

print(studies_per_country)
## # A tibble: 23 × 2
##    Country        Num_studies
##    <chr>                <int>
##  1 Austria                  1
##  2 Belgium                  1
##  3 Bulgaria                 1
##  4 Czech Republic           4
##  5 Denmark                  1
##  6 England                  1
##  7 Estonia                  2
##  8 Finland                  1
##  9 France                   3
## 10 Germany                  9
## # ℹ 13 more rows
# Plot
ggplot(studies_per_country, aes(x = reorder(Country, -Num_studies), y = Num_studies)) +
  geom_col(fill = "darkseagreen") +
  theme_minimal() +
  labs(title = "Number of Studies per Country",
       x = "Country", y = "Number of Studies") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(breaks = seq(0, max(studies_per_country$Num_studies), by = 2))

# Interactions per network 
inter_per_net <- interactions %>%
  group_by(Network_id) %>%
  summarise(Total_Interactions = sum(Interaction)) %>%
  arrange(desc(Total_Interactions))

# Get unique Network_id - Bio-region pairs
region_per_net <- interactions %>%
  select(Network_id, Bioregion) %>%
  distinct()

# Join with interaction summary
inter_per_net_with_region <- inter_per_net %>%
  left_join(region_per_net, by = "Network_id")

# Count networks per bio-region
networks_per_bioregion <- network_summary %>%
  select(Network_id, Bioregion) %>%
  distinct() %>%
  count(Bioregion)

# Plot
ggplot(networks_per_bioregion, aes(x = reorder(Bioregion, -n), y = n)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Number of Networks per Bioregion",
       x = "Bioregion", y = "Number of Networks") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Count networks per country
networks_per_country <- network_summary %>%
  select(Network_id, Country) %>%
  distinct() %>%
  count(Country)

# Plot count of networks per country 
ggplot(networks_per_country, aes(x = reorder(Country, -n), y = n)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Number of Networks per Country",
       x = "Country", y = "Number of Networks") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Get unique network country bio-region combinations
country_bioregion <- network_summary %>%
  select(Network_id, Country, Bioregion) %>%
  distinct()

# Count networks per country and attach bior-egion
networks_per_country <- country_bioregion %>%
  distinct(Network_id, Country, Bioregion) %>%
  count(Country, Bioregion)

# Plot
ggplot(networks_per_country, aes(x = reorder(Country, -n), y = n, fill = Bioregion)) +
  geom_bar(stat = "identity") +
  labs(title = "Number of Networks per Country (Colored by Bioregion)",
       x = "Country", y = "Number of Networks") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_brewer(palette = "Set2")

#---- Interactions analysis ---- 

# Create interaction matrix: rows = networks, columns = species
#pollinators
mat_pollinators <- interactions %>%
  select(Network_id, Pollinator_accepted_name) %>%
  distinct() %>%
  mutate(present = 1) %>%
  pivot_wider(names_from = Pollinator_accepted_name, values_from = present, values_fill = 0) %>%
  column_to_rownames("Network_id") %>%
  as.matrix()

#plants
mat_plants <- interactions %>%
  select(Network_id, Plant_accepted_name) %>%
  distinct() %>%
  mutate(present = 1) %>%
  pivot_wider(names_from = Plant_accepted_name, values_from = present, values_fill = 0) %>%
  column_to_rownames("Network_id") %>%
  as.matrix()

#Pollinators & plants
# Create interaction matrix: rows = plants species, columns = pollinators species
mat_plant_x_poll <- interactions %>%
  select(Plant_accepted_name, Pollinator_accepted_name) %>%
  distinct() %>%                              
  mutate(present = 1L) %>%
  pivot_wider(names_from = Pollinator_accepted_name,
              values_from = present, values_fill = 0) %>%
  column_to_rownames("Plant_accepted_name") %>%
  as.matrix()

# Accumulation curves 
# accumulation curve for pollinators 
spec_curve <- specaccum(mat_pollinators, method = "random")

df_curve <- data.frame(
  Sites = spec_curve$sites,
  Richness = spec_curve$richness,
  SD = spec_curve$sd
)

# graphing 
ggplot(df_curve, aes(x = Sites, y = Richness)) +
  geom_line(color = "black", size = 1.2) +                       
  geom_ribbon(aes(ymin = Richness - SD, ymax = Richness + SD), 
              fill = "grey", alpha = 0.5) +                       
  geom_line(data = df_curve, aes(x = Sites, y = Richness))+
  labs(x = "Networks", y = "Pollinator species",
       title = "Pollinator Accumulation Curve") +
  theme_minimal(base_size = 14)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# accumulation curve for plants
spec_curve_plants <- specaccum(mat_plants, method = "random")

df_curve_plants <- data.frame(
  Sites = spec_curve_plants$sites,
  Richness = spec_curve_plants$richness,
  SD = spec_curve_plants$sd
)

# graphing 
ggplot(df_curve_plants, aes(x = Sites, y = Richness)) +
  geom_line(color = "black", size = 1.2) +                       
  geom_ribbon(aes(ymin = Richness - SD, ymax = Richness + SD), 
              fill = "grey", alpha = 0.5) +                       
  geom_line(data = df_curve_plants, aes(x = Sites, y = Richness))+
  labs(x = "Networks", y = "Plants species",
       title = "Plants Accumulation Curve") +
  theme_minimal(base_size = 14)

# accumulation curve for plants and pollinators 
spec_curve_d <- specaccum(mat_plant_x_poll, method = "random", permutations = 200)

df_d <- data.frame(
  Plants      = spec_curve_d$sites,     
  Pollinators = spec_curve_d$richness,   
  SD          = spec_curve_d$sd
)

#graphing
ggplot(df_d, aes(x = Plants, y = Pollinators)) +
  geom_ribbon(aes(ymin = Pollinators - SD, ymax = Pollinators + SD),
              fill = "grey", alpha = 0.5) +
  geom_line(color = "black", size = 1.2) +
  labs(x = "Plant species", y = "Pollinator species",
       title = "Aaccumulation curve of Pollinator across Plant") +
  theme_minimal(base_size = 14)

#---- Species distribution ----

# Calculate occurrence proportion for each pollinator
poll_occurrence <- interactions %>%
  select(Network_id, Pollinator_accepted_name) %>%
  distinct() %>%
  count(Pollinator_accepted_name) %>%
  mutate(freq = n / n_distinct(interactions$Network_id)) %>%
  arrange(desc(freq)) %>%
  mutate(rank = row_number())

# Plots
ggplot(poll_occurrence, aes(x = n)) +
  geom_histogram(binwidth = 1,  color = "purple") +
  labs(title = "Distribution of Number of Networks per Number of Pollinators",
       x = "Number of networks",
       y = "Number of pollinator species") +
  theme_minimal()

ggplot(poll_occurrence, aes(x = freq)) +
  geom_histogram(binwidth = 0.05, fill = "purple", color = "black") +
  labs(title = "Distribution of Pollinator Frequencies",
       x = "Frequency across networks",
       y = "Number of species") +
  theme_minimal()

# # Top pollinators by number of networks
# top_pollinators <- poll_occurrence %>%
#   arrange(desc(n)) %>%
#   head(70)   # change 10 to however many top species you want
# 
# # Join with metadata about networks
# top_poll_networks <- interactions %>%   # replace with your full data frame
#   filter(Pollinator_accepted_name %in% top_pollinators$pollinator) %>%
#   distinct(Pollinator_accepted_name, Network_id, Country)
# 
# ggplot(top_pollinators, aes(x = reorder(Pollinator_accepted_name, n), y = n)) +
#   geom_col(fill = "darkgreen") +
#   coord_flip() +
#   labs(title = "Top Shared Pollinators Across Networks",
#        x = "Pollinator species", y = "Number of networks") +
#   theme_minimal()

# Calculate occurrence proportion for each plant
plant_occurrence <- interactions %>%
  select(Network_id, Plant_accepted_name) %>%
  distinct() %>%
  count(Plant_accepted_name) %>%
  mutate(freq = n / n_distinct(interactions$Network_id)) %>%
  arrange(desc(freq)) %>%
  mutate(rank = row_number())

# Plot
ggplot(plant_occurrence, aes(x = n)) +
  geom_histogram(binwidth = 1,  color = "seagreen") +
  labs(title = "Distribution of Number of Networks per Number of Plants",
       x = "Number of networks",
       y = "Number of Plants species") +
  theme_minimal()

ggplot(plant_occurrence, aes(x = freq)) +
  geom_histogram(binwidth = 0.05, fill = "seagreen", color = "black") +
  labs(title = "Distribution of Plants Frequencies",
       x = "Frequency across networks",
       y = "Number of species") +
  theme_minimal()

#---- Habitats ----

# Count unique networks per habitat
habitat_counts <- network_summary %>%
  select(Network_id, EuPPollNet_habitat) %>%
  distinct() %>%
  count(EuPPollNet_habitat, name = "Num_networks")

# Plot
ggplot(habitat_counts, aes(x = reorder(EuPPollNet_habitat, Num_networks), y = Num_networks)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  coord_flip() +
  labs(title = "Number of Networks per Habitat",
       x = "Habitat", y = "Number of Networks") +
  theme_minimal()

# Habitats per country 
habitat_country_counts <- network_summary %>%
  select(Network_id, EuPPollNet_habitat, Country) %>%
  distinct() %>%
  count(EuPPollNet_habitat, Country)

ggplot(habitat_country_counts, aes(x = reorder(EuPPollNet_habitat, -n), y = n, fill = Country)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "Networks per Habitat and Country",
       x = "Habitat", y = "Number of Networks") +
  theme_minimal()

ggplot(habitat_country_counts, aes(x = reorder(EuPPollNet_habitat, -n), y = n)) +
  geom_bar(stat = "identity", fill = "darkseagreen") +
  coord_flip() +
  facet_wrap(~ Country, scales = "free_x") +
  labs(title = "Networks per Habitat by Country",
       x = "Habitat", y = "Number of Networks") +
  theme_minimal(base_size = 6)

#-- bioregion ----

# Summarize key metrics by bioregion
bioregion_summary <- network_summary %>%
  group_by(Bioregion) %>%
  summarise(
    Num_Networks = n_distinct(Network_id),
    Avg_Plants = mean(n_plants, na.rm = TRUE),
    Avg_Pollinators = mean(n_pollinators, na.rm = TRUE),
    Total_Interactions = sum(inter_per_net$Total_Interactions[match(Network_id, inter_per_net$Network_id)], na.rm = TRUE),
    .groups = "drop"
  )



# ---summarizing species per habitat and pollinator order----

#---- Network Analysis ----

# net_species_counts <- network_summary %>%
#   select(Network_id, Plant_accepted_name, Pollinator_accepted_name) %>%
#   distinct() %>%
#   group_by(Network_id) %>%
#   summarise(
#     N_plants = n_distinct(Plant_accepted_name),
#     N_pollinators = n_distinct(Pollinator_accepted_name),
#     .groups = "drop"
#   )






# Plot geom point graph for the number pf plants an dpollinators 
ggplot(network_summary, aes(x = n_plants, y = n_pollinators)) +
  geom_point(alpha = 0.6, color = "darkblue") +
  labs(title = "Network Size: Number of Species per Network",
       x = "Number of Plant Species", y = "Number of Pollinator Species") +
  theme_minimal()

# Plot geom point graph for the number of plants and pollinators with bioregion colors
ggplot(network_summary, aes(x = n_plants, y = n_pollinators, color = Bioregion)) +
  geom_point(alpha = 0.6, size = 2) +
  labs(title = "Number of Species per Network",
       x = "Number of Plant Species", 
       y = "Number of Pollinator Species",
       color = "Bioregion") +
  theme_minimal() +
  theme(legend.position = "right") +
  scale_color_brewer(palette = "Set2")  # Or use scale_color_viridis_d() for more colors

# Filter and plot large networks (e.g., N_plants and N_pollinators >= 10, adjust threshold as needed)
large_net_threshold <- 30  # Define threshold for "large" networks
large_net_counts <- network_summary %>%
  filter(n_plants >= large_net_threshold & n_pollinators >= large_net_threshold)

# # Join with country information
# large_net_with_country <- large_net_counts %>%
#   inner_join(interactions %>% distinct(Network_id), by = "Network_id")

ggplot(large_net_counts, aes(x = n_plants, y = n_pollinators, color = Country)) +
  geom_point(alpha = 0.6, size = 3) +  # Increased size to 3 (adjust as needed)
  labs(title = paste0("Large Networks (≥ ", large_net_threshold, " Species)"),
       x = "Number of Plant Species", y = "Number of Pollinator Species") +
  theme_minimal() +
  theme(legend.position = "right")  # Adjust legend position as needed

# Summaries total interaction count per network and country
net_interactions_country <- interactions %>%
  group_by(Network_id, Country) %>%
  summarise(Total_interactions = sum(Interaction), .groups = "drop")

# Plot boxplot of interaction counts by country
ggplot(net_interactions_country, aes(x = Country, y = Total_interactions)) +
  geom_boxplot(fill = "lightblue") +
  labs(title = "Number of Interactions per Network by Country",
       x = "Country", y = "Total Number of Interactions") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# For indexes calculations 

# Create presence column (1 = presents)
interactions$present <- ifelse(interactions$Interaction > 0, 1, 0)

# Create a named list of matrices (one per network)
network_mats <- interactions %>%
  select(Network_id, Plant_accepted_name, Pollinator_accepted_name, present) %>%
  distinct() %>%
  pivot_wider(
    names_from = Pollinator_accepted_name,
    values_from = present,
    values_fill = 0
  ) %>%
  split(.$Network_id)

# Convert each to matrix
network_matrices <- lapply(network_mats, function(df) {
  mat <- as.matrix(df[,-1])
  rownames(mat) <- df$Plant_accepted_name
  mat
})

# # calculating the indexes for each network 
# network_stats <- lapply(network_matrices, function(m) {
#   m_bin <- (m > 0) * 1
#   data.frame(
#     Connectance = networklevel(m_bin, index = "connectance"),
#     Nestedness  = networklevel(m_bin, index = "nestedness")
#     # H2          = networklevel(m_bin, index = "H2")
#   )
# }) %>% bind_rows(.id = "Network_id")
# 
# # box plot for the connectance
# network_stats_full <- network_stats %>%
#   left_join(interactions %>% distinct(Network_id, Country, Bioregion, EuPPollNet_habitat), by = "Network_id")
# ggplot(network_stats_full, aes(x = Country, y = Connectance)) +
#   geom_boxplot(fill = "lightgreen") +
#   theme_minimal() +
#   labs(title = "Connectance Across Countries", y = "Connectance", x = "Country") +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1))
# 
# # box plot for the nestedness
# ggplot(network_stats_full, aes(x = Country, y = Nestedness)) +
#   geom_boxplot(fill = "salmon") +
#   theme_minimal() +
#   labs(title = "Nestedness Across Countries", y = "Nestedness", x = "Country") +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Set up parallel backend
cores <- detectCores() - 1
registerDoParallel(cores)

# Step 1: Get network sizes to filter small networks
network_sizes <- lapply(names(network_matrices), function(net_id) {
  m <- network_matrices[[net_id]]
  data.frame(Network_id = net_id, N_plants = nrow(m), N_pollinators = ncol(m))
}) %>% bind_rows()

# Filter large networks (e.g., >=3 plants and >=3 pollinators)
large_networks <- network_sizes %>%
  filter(N_plants >= 3, N_pollinators >= 3) %>%
  pull(Network_id)

# Step 2: Calculate network metrics for large networks
network_stats_large <- foreach(net_id = large_networks, .combine = rbind, 
                               .packages = c("igraph", "bipartite")) %dopar% {
                                 m_bin <- (network_matrices[[net_id]] > 0) * 1  # Binary matrix
                                 g <- graph_from_incidence_matrix(m_bin, directed = FALSE)
                                 g_poll <- bipartite.projection(g)$proj2  # Pollinator projection
                                 
                                 # Network metrics
                                 net_levels <- networklevel(m_bin, index = c("connectance", "NODF"))
                                 connectance <- net_levels["connectance"]
                                 nodf <- net_levels["NODF"]
                                 evenness <- networklevel(m_bin, index = "interaction evenness")
                                 
                                 # Modularity (skip if projected network too small)
                                 mod <- if (vcount(g_poll) >= 5 && ecount(g_poll) >= 5) {
                                   comm <- cluster_fast_greedy(g_poll)
                                   modularity(g_poll, membership(comm))
                                 } else {
                                   NA
                                 }
                                 
                                 data.frame(
                                   Network_id = net_id,  # Defined within the loop
                                   Connectance = connectance,
                                   Nestedness_NODF = nodf,
                                   Modularity = mod,
                                   Interaction_Evenness = evenness
                                 )
                               }

# Stop parallel backend
stopImplicitCluster()

# Step 3: Assign NA to small networks
small_networks <- setdiff(names(network_matrices), large_networks)
network_stats_small <- data.frame(
  Network_id = small_networks,
  Connectance = NA,
  Nestedness_NODF = NA,
  Modularity = NA,
  Interaction_Evenness = NA
)

# Combine results
network_stats <- bind_rows(network_stats_large, network_stats_small)

# Join with metadata
network_stats_full <- network_stats %>%
  left_join(interactions %>% distinct(Network_id, Country, Bioregion, EuPPollNet_habitat), by = "Network_id") %>%
  left_join(network_sizes, by = "Network_id")

# Visualizations
# Connectance
ggplot(network_stats_full, aes(x = Country, y = Connectance)) +
  geom_boxplot(fill = "lightgreen") +
  theme_minimal() +
  labs(title = "Connectance Across Countries", y = "Connectance", x = "Country") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 165 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# Nestedness NODF
ggplot(network_stats_full, aes(x = Country, y = Nestedness_NODF)) +
  geom_boxplot(fill = "salmon") +
  theme_minimal() +
  labs(title = "Nestedness (NODF) Across Countries", y = "NODF", x = "Country") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 165 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# Modularity
ggplot(network_stats_full %>% filter(!is.na(Modularity)), aes(x = Country, y = Modularity)) +
  geom_boxplot(fill = "lightpink") +
  theme_minimal() +
  labs(title = "Modularity Across Countries (Large Networks Only)", y = "Modularity", x = "Country") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Interaction Evenness
ggplot(network_stats_full, aes(x = Country, y = Interaction_Evenness)) +
  geom_boxplot(fill = "lightblue") +
  theme_minimal() +
  labs(title = "Interaction Evenness Across Countries", y = "Evenness", x = "Country") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 165 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# Summary
cat("Number of small networks (skipped):", nrow(network_stats_small), "\n")
## Number of small networks (skipped): 165
cat("Number of large networks analyzed:", nrow(network_stats_large), "\n")
## Number of large networks analyzed: 1357
summary(network_stats_full[, c("Connectance", "Nestedness_NODF", "Modularity", "Interaction_Evenness")])
##   Connectance      Nestedness_NODF    Modularity       Interaction_Evenness
##  Min.   :0.02549   Min.   : 9.774   Min.   :-0.10000   Min.   :0.6301      
##  1st Qu.:0.14264   1st Qu.:25.645   1st Qu.: 0.07124   1st Qu.:0.6800      
##  Median :0.23810   Median :33.333   Median : 0.16320   Median :0.7099      
##  Mean   :0.25456   Mean   :36.259   Mean   : 0.15915   Mean   :0.7173      
##  3rd Qu.:0.34807   3rd Qu.:44.834   3rd Qu.: 0.24097   3rd Qu.:0.7467      
##  Max.   :0.77778   Max.   :78.089   Max.   : 0.53539   Max.   :0.8856      
##  NA's   :165       NA's   :165      NA's   :213        NA's   :165
# Step 1: Aggregate coordinates by Network_id
network_coords <- interactions %>%
  group_by(Network_id) %>%
  summarise(Latitude = mean(Latitude, na.rm = TRUE),
            Longitude = mean(Longitude, na.rm = TRUE)) %>%
  left_join(network_stats_full, by = "Network_id")

# Step 2: Get a European map
europe_map <- ne_countries(scale = "medium", continent = "Europe", returnclass = "sf")

# Step 3: Create a spatial plot (e.g., Modularity by location)
ggplot(data = europe_map) +
  geom_sf(fill = "lightgray", color = "white") +
  geom_point(data = network_coords, 
             aes(x = Longitude, y = Latitude, color = Modularity), 
             size = 2, alpha = 0.6) +
  scale_color_gradient(low = "blue", high = "red", na.value = "gray", 
                       name = "Modularity") +
  coord_sf(xlim = c(-10, 35), ylim = c(35, 72)) +
  theme_minimal() +
  labs(title = "Modularity Across European Plant-Pollinator Networks",
       x = "Longitude", y = "Latitude") +
  theme(legend.position = "right")

# Repeat for other metrics (e.g., Interaction_Evenness)
ggplot(data = europe_map) +
  geom_sf(fill = "lightgray", color = "white") +
  geom_point(data = network_coords, 
             aes(x = Longitude, y = Latitude, color = Interaction_Evenness), 
             size = 2, alpha = 0.6) +
  scale_color_gradient(low = "blue", high = "green", na.value = "gray", 
                       name = "Evenness") +
  coord_sf(xlim = c(-10, 35), ylim = c(35, 72)) +
  theme_minimal() +
  labs(title = "Interaction Evenness Across European Networks",
       x = "Longitude", y = "Latitude") +
  theme(legend.position = "right")

# Step 2: Extract coordinates for distance calculation
coords <- network_coords %>%
  select(Network_id, Latitude, Longitude) %>%
  distinct()

# Convert to matrix of coordinates
coord_matrix <- as.matrix(coords[, c("Longitude", "Latitude")])  # Order: lon, lat

# Step 3: Calculate pairwise distances (in kilometers) using Haversine
dist_matrix <- distm(coord_matrix, fun = distHaversine) / 1000  # Convert meters to km
rownames(dist_matrix) <- coords$Network_id
colnames(dist_matrix) <- coords$Network_id

# Step 4: Convert to long format for easier analysis
dist_long <- as.data.frame(as.table(dist_matrix)) %>%
  filter(!is.na(Freq)) %>%
  rename(Network1 = Var1, Network2 = Var2, Distance_km = Freq) %>%
  filter(Network1 != Network2)

# # Step 5: Summary statistics
# summary_stats <- dist_long %>%
#   summarise(
#     Min_Distance = min(Distance_km),
#     Mean_Distance = mean(Distance_km),
#     Max_Distance = max(Distance_km)
#   )
# print(summary_stats)
# 
# # Example: Find nearest neighbor
# nearest_neighbor <- dist_long %>%
#   group_by(Network1) %>%
#   slice_min(Distance_km, n = 1) %>%
#   rename(Nearest_Network = Network2, Nearest_Distance_km = Distance_km)
# network_coords <- network_coords %>%
#   left_join(nearest_neighbor, by = c("Network_id" = "Network1"))
# 
# # Optional Visualization: Map with distances
# ggplot(data = europe_map) +
#   geom_sf(fill = "lightgray", color = "white") +
#   geom_point(data = network_coords, aes(x = Longitude, y = Latitude, color = Nearest_Distance_km), size = 2, alpha = 0.6) +
#   scale_color_gradient(low = "blue", high = "red", na.value = "gray", name = "Nearest Distance (km)") +
#   coord_sf(xlim = c(-10, 35), ylim = c(35, 72)) +
#   theme_minimal() +
#   labs(title = "Nearest Neighbor Distances Across Networks",
#        x = "Longitude", y = "Latitude") +
#   theme(legend.position = "right")

# Spatial plot zoomed on Ireland
ggplot(data = europe_map) +
  geom_sf(fill = "lightgray", color = "white") +
  geom_point(data = network_coords, 
             aes(x = Longitude, y = Latitude), 
             size = 2, alpha = 0.6) +
  coord_sf(xlim = c(-10, -5), ylim = c(51, 55)) +
  theme_minimal() +
  labs(title = " Networks in Ireland",
       x = "Longitude", y = "Latitude") +
  theme(legend.position = "right")

# Spatial plot zoomed on Germany
ggplot(data = europe_map) +
  geom_sf(fill = "lightgray", color = "white") +
  geom_point(data = network_coords, 
             aes(x = Longitude, y = Latitude), 
             size = 2, alpha = 0.6) +
  coord_sf(xlim = c(5, 15), ylim = c(47, 55)) +  # Zoom to Germany
  theme_minimal() +
  labs(title = "Networks in Germany",
       x = "Longitude", y = "Latitude") +
  theme(legend.position = "right")

# # Species-level analysis - degree distribution 
# species_roles <- lapply(network_matrices, function(m) {
#   m_bin <- (m > 0) * 1
#   roles <- specieslevel(m_bin, index = c("degree", "normalized degree"))
#   # Combine plant + pollinator into one dataframe
#   df <- bind_rows(
#     as.data.frame(roles$`higher level`) %>%
#       rownames_to_column("Species") %>%
#       mutate(Level = "Plant"),
#     as.data.frame(roles$`lower level`) %>%
#       rownames_to_column("Species") %>%
#       mutate(Level = "Pollinator")
#   )
#   df
# }) %>%
#   bind_rows(.id = "Network_id")
# 
# # # Diagnose: which networks have >1 metadata row?
# # interactions %>%
# #   distinct(Network_id, Country, Bioregion, EuPPollNet_habitat) %>%
# #   count(Network_id, name = "n_meta_rows") %>%
# #   filter(n_meta_rows > 1)
# 
# # Majority Country per network
# country_mode <- network_summary %>%
#   count(Network_id, Country, name = "n") %>%
#   group_by(Network_id) %>%
#   slice_max(n, with_ties = FALSE) %>%
#   ungroup() %>%
#   select(Network_id, Country)
# 
# # Majority Bioregion per network
# bioregion_mode <- network_summary %>%
#   count(Network_id, Bioregion, name = "n") %>%
#   group_by(Network_id) %>%
#   slice_max(n, with_ties = FALSE) %>%
#   ungroup() %>%
#   select(Network_id, Bioregion)
# 
# # Majority Habitat per network
# habitat_mode <- network_summary %>%
#   count(Network_id, EuPPollNet_habitat, name = "n") %>%
#   group_by(Network_id) %>%
#   slice_max(n, with_ties = FALSE) %>%
#   ungroup() %>%
#   select(Network_id, EuPPollNet_habitat)
# 
# # Combine into one-row-per-network metadata
# meta_by_net <- country_mode %>%
#   left_join(bioregion_mode, by = "Network_id") %>%
#   left_join(habitat_mode, by = "Network_id")
# 
# # Safe join
# species_roles_full <- species_roles %>%
#   left_join(meta_by_net, by = "Network_id")
# 
# # Join with metadata
# species_roles_full <- species_roles %>%
#   left_join(interactions %>% 
#               distinct(Network_id, Country, Bioregion, EuPPollNet_habitat),
#             by = "Network_id")
# 
# # Explore top high degree
# top_generalists <- species_roles_full %>%
#   arrange(desc(degree)) %>%
#   group_by(Level) %>%
#   slice_head(n = 10)
# 
# print(top_generalists)
# # print(top_specialists)
# 
# # Plots
# ggplot(species_roles_full, aes(x = degree, fill = Level)) +
#   geom_histogram(bins = 30, alpha = 0.6, position = "identity") +
#   theme_minimal() +
#   labs(title = "Degree distribution of plants and pollinators",
#        x = "Degree", y = "Number of species")
#